
import requests
import re
def get_html():

    ulr='https://www.ithome.com/0/759/777.htm'
    resp=requests.get(ulr)
    resp.encoding='utf-8'
    return resp.text
def pares_html(html):

    person=re.findall('</p></li><li><p data-vmark="([\w]*)">([\u4e00-\u9fa5]*)',html)
    print(person)
    lst=[]
    pattern=r'[\u4e00-\u9fa5]+'
    for i in person:
         for item in i:
            if re.match(pattern,item):
               lst.append(item)
            else:
                print(item)

    money=re.findall('([\d]*) 亿美元',html)
    print(money)
    lst1=[]
    for a,b in zip(lst,money):
       lst1.append([a,b])
    return lst1


